from clustergrammer_widget import *
net = Network(clustergrammer_widget)
df = {}
import clustergrammer_groupby as cby
import gene_exp_10x
from sklearn.metrics import f1_score
import pandas as pd
import numpy as np
from copy import deepcopy
import matplotlib.pyplot as plt
%matplotlib inline
net.load_file('../data/mnist.txt')
df['ini'] = net.export_df()
df['ini'].shape
net.load_df(df['ini'])
net.cluster()
net.widget()
df['cat_sig'], keep_genes, keep_genes_dict = cby.generate_signatures(df['ini'], 'Digit', num_top_dims=15)
df['pred_cat'], df['sig_sim'], df['sig_max'], y_info = cby.predict_cats_from_sigs(df['ini'], df['cat_sig'])
df['conf'], true_count, pred_count, ser_correct, fraction_correct = cby.confusion_matrix_and_correct_series(y_info)
real_fraction_correct = deepcopy(fraction_correct)
print(real_fraction_correct)
df['cat_sig'].shape
net.load_df(df['cat_sig'])
net.cluster()
net.widget()
%%time
num_shuffles = 100
perform_ser = cby.compare_performance_to_shuffled_labels(df['ini'], df['cat_sig'], 'Digit', num_shuffles=num_shuffles)
print('mean: ', perform_ser.mean(), 'std: ', perform_ser.std())
real_performance = perform_ser[perform_ser > real_fraction_correct].shape[0]/num_shuffles
print('real labels perform in the top ' + str(real_performance*100) + '% of shuffled labels')
perform_ser.hist()